********************************************************************************
********************************************************************************
//The Political Life Cycle and Electoral Mobilisation among Immigrant-Origin 
//and Native Citizens:
//Survey Evidence from the 2021 German Election

//replication Dofile

//required packages:
//ssc install fitstat
//ssc install coefplot
********************************************************************************
********************************************************************************

set more off
version 18.5
clear all

set scheme s2mono
graph set window fontface "Times New Roman"

cd "YOURPATH"

use "goerres_elis_mayer_political_life_cycle_replication_data.dta", clear

***************************
*recode dependent variables
***************************

*polint
foreach num of numlist 1 2 3 {													
gen polint_w`num' = pr011_w`num'
recode polint_w`num' (1=5)(2=4)(3=3)(4=2)(5=1)
label variable polint_w`num' "political interest"
}
replace polint_w2 = polint_w1 if polint_w2 ==.
replace polint_w3 = polint_w2 if polint_w3 ==.

*ease of voting
recode pr095a (97 =.)(1=4)(2=3)(3=2)(4=1), gen(voteease)
label variable voteease "ease of voting"

*propensity to participate
gen ptp = pr002a_w2
recode ptp (1=5)(2=4)(3=3)(4=2)(5=1)
label variable ptp "propensity to participate"

*retrospective turnout
recode pr089_w3 (1=1)(2=0), gen(turnout)
label variable turnout "post electoral turnout (1='yes')"

*****************************
*recode independent variables
*****************************

*occupation status
recode pr027_w1 (1 4 7 = 1)(2 3 5 6 8 97 = 0), gen(occ)
label variable occ "Occupation 1='yes'"

*income
gen income = pr090a_w1
gen income2 = pr090b_w1
recode income income2 (97 98 =.)
recode income2 (1=375) (2=1125) (3=1750) (4=2250) (5=2750) (6=3500) (7=4500) (8=6500) (9=12000)
replace income = income2 if income ==.
gen income_cat =.
replace income_cat = 0 if income < 1500
replace income_cat = 1 if income >= 1500 & income < 2700
replace income_cat = 2 if income >= 2700 & income < 4000
replace income_cat = 3 if income >= 4000

gen hhsize = pr028a_w1
	sum hhsize
	replace hhsize = r(mean) if hhsize ==.

replace income_cat = income_cat / hhsize
label variable income "income metric"
label variable income2 "income categories (survey answers)"
label variable income_cat "income catogorized"
label variable hhsize "household size"

*political knowledge
recode pr043a_w1 3=1 else=0, gen(polknow_bk01)
recode pr043b_w1 2=1 else=0, gen(polknow_eu01)
recode pr043ca_w1 7=1 else=0, gen(polknow_afd01)
recode pr043cb_w1 5=1 else=0, gen(polknow_linke01)
recode pr043cc_w1 4=1 else=0, gen(polknow_fdp01)

foreach var of varlist polknow_bk01 polknow_eu01 polknow_afd01 polknow_linke01 polknow_fdp01 {
	tab `var'
}
gen polknow=(polknow_bk01+polknow_eu01+polknow_afd01+polknow_linke01+polknow_fdp01)/5
label variable polknow "Political knowledge index"

*education
replace pr025_w1 =6 if pr025_w1 ==97											
replace pr025_w1 =6 if pr025_w1 ==98											
label define pr025 ///
1	"no graduation" ///
2	"compuls. basic secondary schooling" ///
3	"intermediate school-leaving certificate" ///
4	"specialised A-levels" ///
5	"higher ed. entrance qual. (A-levels)" ///
6	"other graduation" ///
7	"student", replace
label values pr025_w1 pr025
gen education = pr025_w1
label variable education "Formal Education (survey categories)"
label values education pr025
recode education (1=0)(2=1)(3 6 7=2)(4=3)(5=4), gen(education_metric)
label variable education_metric "Formal education"

*socioeconomic status (ses)
foreach var of varlist income_cat education_metric occ {
	sum `var'
	replace `var' = ((`var' - `r(min)') / (`r(max)'-`r(min)'))
	}

gen ses = (income_cat + education_metric + occ)/3
sum ses
replace ses = (ses - `r(min)') / (`r(max)'-`r(min)')
label variable ses "Socioeconomic status"

*partner
gen partner =.
replace partner =1 if pr080_w1 ==1
replace partner =0 if pr080_w1 >1 & pr080_w1 !=.
label variable partner "Partner (yes/no)"

*discussion partner
gen dicpartner1 = pr047a_w1
gen dicpartner2 = pr048a_w1
recode dicpartner1 dicpartner2 (2=0)
gen discpartner = dicpartner1 + dicpartner2
recode discpartner (.=0)
label variable discpartner "Num. of political discussion partners"
label define discpartner 0 "Num. of pol. discussion partners: 0" 1 "Num. of pol. discussion partners: 1" 2 "Num. of pol. discussion partners: 2"
label values discpartner discpartner

recode discpartner (0=1) (else=0), gen(discpartner0)
recode discpartner (1=1) (else=0), gen(discpartner1)
recode discpartner (2=1) (else=0), gen(discpartner2)

*social organisation membership
recode pr033_w1 (1=1) (.=.) (2=0), gen(socorgamem)
label variable socorgamem "member of social organisation"

*union membership
recode pr031_w1 (1=1) (else=0), gen(unionmem)
label variable unionmem "union member"

*socagent: origin
gen socagent_orig = 0
replace socagent_orig = 1 if pr086b_w2 ==1
replace socagent_orig = 1 if on003 ==0
label variable socagent_orig "Soc. agent German citizen 1='yes'"

*socagent: polint
recode pr086d_w2 (1=5)(2=4)(3=3)(4=2)(5=1)(97 98 =.), gen(socagent_polint)
label variable socagent_polint "Political interest soc. agent"

*socagent: education
label define pr086c ///
1	"no graduation" ///
2	"compuls. basic secondary schooling" ///
3	"intermediate school-leaving certificate" ///
4	"specialised A-levels" ///
5	"higher ed. entrance qual. (A-levels)" ///
6	"other graduation", replace
label values pr086c_w2 pr086c
gen socagent_education = pr086c_w2
label values socagent_education pr086c
recode socagent_education (1 6=0)(2=1)(3=2)(4=3)(5=4), gen(socagent_education_metric)
label variable socagent_education_metric "Education soc. agent"

recode socagent_education_metric (4=1) (else=0), gen(socagent_highedu)
label variable socagent_highedu "Soc. agent higher education entrance 1='yes'"

*campaign contact
foreach var of varlist pr084d_1_w2 pr084d_2_w2 pr084d_3_w2 pr084d_4_w2 pr084d_5_w2 pr084d_6_w2 pr084d_7_w2 pr084d_8_w2 {
	replace `var' =0 if `var' ==.
}
gen campaign_contact = pr084d_1_w2 + pr084d_2_w2 + pr084d_3_w2 + pr084d_4_w2 + pr084d_5_w2 + pr084d_6_w2 + pr084d_7_w2 + pr084d_8_w2
label variable campaign_contact "Number of contacts with party campaigners"
	
*context: borough turnout
label variable co_turnout2017 "Turnout 2017 Bundestag election in borough"

*context: campaign activity
gen campaign2 = co_cdu_campaign + co_spd_campaign + co_fdp_campaign + co_thegreens_campaign + co_theleft_campaign + co_afd_campaign
label variable campaign2 "Bundestag parties campaign activity in borough"

*context: country of origin

*compute year of socialisation
gen socyear_h = age - 16
gen socyear = 2021 - socyear_h
drop socyear_h

*impute country status for year of socialisation
gen polity_socage =.
foreach num of numlist 1946/2018 {
	replace polity_socage = polity_`num' if socyear == `num'
}

gen gdp_socage =.
foreach num of numlist 1900/2021 {
	replace gdp_socage = gdp_`num' if socyear == `num' 
}

label variable polity_socage "Polity5 index at 16 yrs"
label variable gdp_socage "GDP per capita at 16 yrs"

gen gdp_socage_log = log(gdp_socage)
label variable gdp_socage_log "GDP per capita at 16 yrs (log)"

replace polity_socage = polity_socage + 0.01
gen polity_socage_log = log(polity_socage)
label variable polity_socage_log "Polity5 index at 16 yrs (log)"


***********
*imputation	
***********

*independent variables
foreach var of varlist socagent_polint polity_socage gdp_socage gdp_socage_log polity_socage polity_socage_log co_turnout2017 {
	sum `var'
	replace `var' = r(mean) if `var' ==.
}

replace polity_socage =10 if ts006c =="DEU" //because germany always scored high

replace polity_socage=round(polity_socage, 1)
recode polity_socage(-10/-6=0) (-5/9=0) (10=1), gen(polity_socage_cat)
label variable polity_socage_cat "Polity5 index at 16 yrs (catogorized)"
label define polity_socage_cat 0 "Not fully democratic" 1 "Fully democratic"
label values polity_socage_cat polity_socage_cat

recode gdp_socage(0/15000=0) (15001/30000=1) (30001/100000=2), gen(gdp_socage_cat)
label variable gdp_socage_cat "GDP per capita at 16 yrs (catogorized)"
label define gdp_socage_cat 0 "Low" 1 "Medium" 2 "High"
label values gdp_socage_cat gdp_socage_cat

gen gdp_socage_cat_med =0 
replace gdp_socage_cat_med =1 if gdp_socage_cat ==1
gen gdp_socage_cat_high =0 
replace gdp_socage_cat_high =1 if gdp_socage_cat ==2

	
*dependent variables
foreach var of varlist ptp polint_w1 voteease {
	quietly sum `var', detail
	replace `var' = r(p50) if `var' ==.
}


*************
*descriptives
*************

*Table 3: Coding and Distributions of Independent Variables. Weighted Estimates.
foreach var of varlist socagent_orig socagent_highedu socagent_polint polity_socage_log gdp_socage_log ses polknow discpartner0 discpartner1 discpartner2 campaign2 campaign_contact co_turnout2017  {
	reg `var' immigrant [aw=weight]
}
//note: python, excel and ms word used for final table creation.
	
*standardize vars to min 0 and max 1 for comparability of coefficients
foreach var of varlist ptp voteease polint_w1 polint_w3 age socagent_polint socagent_education_metric campaign_contact co_turnout2017 education_metric campaign2 gdp_socage gdp_socage_log polity_socage polity_socage_log {
	sum `var'
	replace `var' = ((`var' - `r(min)') / (`r(max)'-`r(min)'))
	}
	

*******************************
*local theoretical perspectives
*******************************

local controls "age male partner i.onomgroup"	
local social "socagent_orig socagent_highedu socagent_polint polity_socage_log gdp_socage_log"
local ses "ses polknow"
local mobil "campaign_contact campaign2 co_turnout2017 ib0.discpartner"

	
**********************
*multivariate analyses
**********************

*Table A.2: OLS Regression on Political Interest Measured in Telephone Survey Wave 3
reg polint_w3 i.onomgroup [pw=weight], vce(robust)
est store m0_polint
fitstat
reg polint_w3 `controls' [pw=weight], vce(robust)
est store m1_polint
fitstat
reg polint_w3 `social' `controls' [pw=weight], vce(robust)
est store m2_polint
fitstat
reg polint_w3 `ses' `controls'  [pw=weight], vce(robust)
est store m3_polint
fitstat
reg polint_w3 `mobil' `controls' [pw=weight], vce(robust)
est store m4_polint
fitstat
reg polint_w3 `social' `ses' `controls' [pw=weight], vce(robust)
est store m5_polint
fitstat
reg polint_w3 `social' `ses' `mobil' `controls' [pw=weight], vce(robust)
est store m6_polint
fitstat
reg polint_w3 socagent_orig socagent_highedu socagent_polint gdp_socage_log  `ses' campaign_contact campaign2 co_turnout2017 discpartner1 discpartner2 `controls' [iw=weight] if immigrant ==0
est store m6_polint_natives
fitstat
reg polint_w3 socagent_orig socagent_highedu socagent_polint gdp_socage_log  `ses' campaign_contact campaign2 co_turnout2017 discpartner1 discpartner2 `controls' [iw=weight] if immigrant ==1
est store m6_polint_mig
fitstat
esttab m1_polint m2_polint m3_polint m4_polint m5_polint m6_polint using "polint.rtf", replace b(2) se(2) r2 ar2 star(* 0.1 ** 0.05 *** 0.01) label nogaps title("political interest (w3)") nodepvars

*Table A.3: OLS Regression on Propensity to Vote Measured in Telephone Survey Wave 2
reg voteease i.onomgroup [pw=weight], vce(robust)
est store m0_voteease
fitstat
reg voteease `controls' [pw=weight], vce(robust)
est store m1_voteease
fitstat
reg voteease `social' `controls' [pw=weight], vce(robust)
est store m2_voteease
fitstat
reg voteease `ses' `controls' [pw=weight], vce(robust)
est store m3_voteease
fitstat
reg voteease  `mobil' `controls' [pw=weight], vce(robust)
est store m4_voteease
fitstat
reg voteease  `social' `ses' `controls' [pw=weight], vce(robust)
est store m5_voteease
fitstat
reg voteease  `social' `ses' `mobil' `controls' [pw=weight], vce(robust)
est store m6_voteease
fitstat
reg voteease socagent_orig socagent_highedu socagent_polint gdp_socage_log  `ses' campaign_contact campaign2 co_turnout2017 discpartner1 discpartner2 `controls' [iw=weight] if immigrant ==0
est store m6_voteease_natives
fitstat
reg voteease socagent_orig socagent_highedu socagent_polint gdp_socage_log  `ses' campaign_contact campaign2 co_turnout2017 discpartner1 discpartner2 `controls' [iw=weight] if immigrant ==1
est store m6_voteease_mig
fitstat
esttab m1_voteease m2_voteease m3_voteease m4_voteease m5_voteease m6_voteease using "voteease.rtf", replace b(2) se(2) r2 ar2 star(* 0.1 ** 0.05 *** 0.01) label nogaps title("Ease of voting (w3)") nodepvars

*Table A.4: OLS Regression on Reported Ease of Voting Measured in Telephone Survey Wave 3
reg ptp i.onomgroup [pw=weight], vce(robust)
est store m0_ptp
fitstat
reg ptp `controls' [pw=weight], vce(robust)
est store m1_ptp
fitstat
reg ptp `social' `controls' [pw=weight], vce(robust)
est store m2_ptp
fitstat
reg ptp `ses' `controls' [pw=weight], vce(robust)
est store m3_ptp
fitstat
reg ptp  `mobil' `controls' [pw=weight], vce(robust)
est store m4_ptp
fitstat
reg ptp `social' `ses' `controls' [pw=weight], vce(robust)
est store m5_ptp
fitstat
reg ptp  `social' `ses' `mobil' `controls' [pw=weight], vce(robust)
est store m6_ptp
fitstat
reg ptp socagent_orig socagent_highedu socagent_polint gdp_socage_log  `ses' campaign_contact campaign2 co_turnout2017 discpartner1 discpartner2 `controls' [iw=weight] if immigrant ==0
est store m6_ptp_natives
fitstat
reg ptp socagent_orig socagent_highedu socagent_polint gdp_socage_log  `ses' campaign_contact campaign2 co_turnout2017 discpartner1 discpartner2 `controls' [iw=weight] if immigrant ==1
est store m6_ptp_mig
fitstat
esttab m1_ptp m2_ptp m3_ptp m4_ptp m5_ptp m6_ptp using "ptp.rtf", replace b(2) se(2) r2 ar2 star(* 0.1 ** 0.05 *** 0.01) label nogaps title("propensity to participate (w2)") nodepvars

*Figure 1: Coefficient Plot for Three Dependent Variables
coefplot m6_polint, bylabel(Political interest) || m6_ptp, bylabel(Prop to vote) || m6_voteease, bylabel(Ease of voting) ||, ///
	scheme(plotplain) ///
	plotregion(margin(zero)) ///
	graphregion(margin(zero)) ///
	levels(95) ///
	msymbol(O) ///
	headings(age="{bf:Controls}" ///
	1.onomgroup="{it:Origin-group (ref.: natives)}" ///
	*gdp_socage_cat="{it:GDP per capita at 16 yrs (ref.: low)}" ///
	socagent_orig="{bf:Phase 1 adolescence}" ///
	ses="{bf:Phase 2 periods of education and working}" ///
	campaign_contact="{bf:Phase 3 the election campaign}" ///
	1.discpartner="{it:Numb. of pol. discussion partners (ref.: 0)}") ///
	xline(0, lpatter(solid)) ///
	drop (_cons) ///
	ci(95) ///
	xlabel(-0.2(0.2)0.8, grid) ///
	xscale(range(-0.2 0.4)) ///
	xsize(7) ///
	byopts(row(1)) ///
	name(allmodels, replace)


*****************************************************		
*robustness checks, assumptions and additional models
*****************************************************

*Table A.6: Hausman Test for Similarity of Regression Coefficients Between Separated Models for Natives and Immigrants with Dependent Variable Political Interest
hausman m6_polint_natives m6_polint_mig, alleqs constant
suest m6_polint_natives m6_polint_mig
foreach var of varlist age male partner socagent_highedu socagent_polint ses polknow campaign_contact campaign2 co_turnout2017 discpartner1 discpartner2 gdp_socage_log {
	test [m6_polint_natives_mean]`var'-[m6_polint_mig_mean]`var' = 0
	}

*Table A.7: Hausman Test for Similarity of Regression Coefficients Between Separated Models for Natives and Immigrants with Dependent Variable Propensity to Vote
hausman m6_ptp_natives m6_ptp_mig, alleqs constant
suest m6_ptp_natives m6_ptp_mig
foreach var of varlist age male partner socagent_highedu socagent_polint ses polknow campaign_contact campaign2 co_turnout2017 discpartner1 discpartner2 gdp_socage_log {
	test [m6_ptp_natives_mean]`var'-[m6_ptp_mig_mean]`var' = 0
	}

*Table A.8: Hausman Test for Similarity of Regression Coefficients Between Separated Models for Natives and Immigrants with Dependent Variable Reported Ease of Voting
hausman m6_voteease_natives m6_voteease_mig, alleqs constant
suest m6_voteease_natives m6_voteease_mig
foreach var of varlist age male partner socagent_highedu socagent_polint ses polknow campaign_contact campaign2 co_turnout2017 discpartner1 discpartner2 gdp_socage_log {
	test [m6_voteease_natives_mean]`var'-[m6_voteease_mig_mean]`var' = 0
	}


******************************************************************
*robustness checks, model assumptions and alternative model setups
******************************************************************

*Table A.9: VIF Values and Breusch–Pagan/Cook–Weisberg Test for Heteroskedasticity for All Three Dependent Variables (Political Interest, Propensity to vote, Reported Ease of Voting) and All Independent Variables
quietly reg polint_w3 `social' `ses' `mobil' `controls'
estat vif
hettest

quietly reg voteease `social' `ses' `mobil' `controls'
estat vif
hettest

quietly reg ptp `social' `ses' `mobil' `controls'
estat vif
hettest

logit turnout `controls' `social' `ses' `mobil' [pw=weight], robust
fitstat